rm(list=ls())
# Check that required packages are installed:
want = c("foreign", "ggplot2", "MASS", "reshape2", "plyr", "dplyr", "ggrepel", 
         "lme4", "coda", "relaimpo", "relimp", "arm", "boot", "entropy", "here")
have = want %in% rownames(installed.packages())
if ( any(!have) ) { install.packages( want[!have] ) }
# load packages
junk <- lapply(want, library, character.only = TRUE)
rm(have,want,junk)

options(scipen = 999,
        warn = 1)

#=============================================================================================================
# Functions
#=============================================================================================================
# Sample mean
sample.mean <- function(x, d) {
  return(mean(x = x[d], na.rm = T))
}

#=============================================================================================================
#                     
# Part 1:
# RELATIVE IMPORTANCE
#
#=============================================================================================================

# Load UNSTACKED data
data <- read.dta("EES_2009_unstacked.dta")

# Remove cases with missing values in our variables
data.un <- data[!is.na(data$lrsp_cent) & !is.na(data$educ) & !is.na(data$female) & !is.na(data$age) & 
                  !is.na(data$standard) & !is.na(data$class) & 
                  !is.na(data$churchat) & !is.na(data$relig) & 
                  !is.na(data$close_party_full_np) & 
                  !is.na(data$polint) & !is.na(data$polinfo) & 
                  !is.na(data$i2ImmCus) & !is.na(data$i1PriEnt) & !is.na(data$i3SamSex) & 
                  !is.na(data$i1StaOwn) & !is.na(data$i3AboFree) & !is.na(data$i1IntEco) &
                  !is.na(data$i3HarSen) & !is.na(data$i1OrdRed) & !is.na(data$i3ObeAut) & 
                  !is.na(data$i3WorCut) & !is.na(data$i2ImmDec), ]
rm(data)

### Set up parametric bootstrap, Silber et al. method, OLS
set.seed(461982)
n.samples <- 1000
# Create empty data frame where to put the values
ratio.lm.boot <- data.frame(syslab = unique(data.un$syslab),
                            ri_ln_m = as.numeric(NA), ri_ln_sd = as.numeric(NA), 
                            ri_ln_lo = as.numeric(NA), ri_ln_hi = as.numeric(NA),
                            ri_exp_m = as.numeric(NA), ri_exp_sd = as.numeric(NA),
                            ri_exp_lo = as.numeric(NA), ri_exp_hi = as.numeric(NA),
                            ri_l2_m = as.numeric(NA), ri_l2_sd = as.numeric(NA), 
                            ri_l2_lo = as.numeric(NA), ri_l2_hi = as.numeric(NA))

for (i in unique(data.un$syslab)) {
  pties <- names(table(subset(data.un, syslab == i)$close_party_full))
  ivs <- c("female", "age", "edlow", "edhigh", "clasd1", "clasd2", "clasd4", "clasd5",
           "churchat", "churchat_sq", "standard", "standard_sq", "relig", "relig_sq",
           "i2ImmCus", "i1PriEnt", "i3SamSex", "i1StaOwn", "i3AboFree", "i1IntEco",
           "i3HarSen", "i1OrdRed", "i3ObeAut", "i3WorCut", "i2ImmDec",
           "i2ImmCus_sq", "i1PriEnt_sq", "i3SamSex_sq", "i1StaOwn_sq", "i3AboFree_sq", "i1IntEco_sq",
           "i3HarSen_sq", "i1OrdRed_sq", "i3ObeAut_sq", "i3WorCut_sq", "i2ImmDec_sq")
  np <- paste0("close_str_", pties)
  fml <- as.formula(paste0("lrsp ~ ", paste(c(ivs, np), collapse = "+")))
  # Remove missings
  no.na.data <- subset(data.un, syslab == i)
  no.na.data <- no.na.data[rowSums(is.na(no.na.data[, np])) == 0, ]
  mod <- lm(formula = fml,
            data = no.na.data)
  s.1 <- which(names(coef(mod)) %in% np)
  s.2 <- which(names(coef(mod)) %in% ivs[5:36])
  n <- 1
  res <- rep(NA, n.samples)
  res.exp <- rep(NA, n.samples)
  res.l2 <- rep(NA, n.samples)
  while(n <= n.samples){
    fml <- as.formula(paste0("unlist(simulate(mod)) ~ ", paste(c(ivs, np), collapse = "+")))
    mod_sim <- lm(formula = fml,
                  data = no.na.data)
    r.i <- relimp(mod_sim, set1 = s.1, set2 = s.2)
    res[n] <- r.i$log.ratio
    res.exp[n] <- exp(res[n])
    res.l2[n] <- log(res.exp[n],2)
    n <- n + 1
  }
  # Natural log (comes as default)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_ln_m"] <-  quantile(res, p = 0.5)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_ln_sd"] <-  sd(res)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_ln_lo"]  <-  quantile(res, p = 0.025)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_ln_hi"] <-  quantile(res, p = 0.975)
  # Exponentiated
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_exp_m"] <-  quantile(res.exp, p = 0.5)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_exp_sd"] <-  sd(res.exp)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_exp_lo"] <-  quantile(res.exp, p = 0.025)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_exp_hi"] <-  quantile(res.exp, p = 0.975)
  # Log 2 (reported in the paper)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_l2_m"] <-  quantile(res.l2, p = 0.5)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_l2_sd"] <-  sd(res.l2)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_l2_lo"] <-  quantile(res.l2, p = 0.025)
  ratio.lm.boot[ratio.lm.boot$syslab == i, "ri_l2_hi"] <-  quantile(res.l2, p = 0.975)
  print(i)
  rm(pties, ivs, np, fml, no.na.data, mod, mod_sim, s.1, s.2, r.i, res, res.exp, res.l2)
}

# FIGURE 2
quartz(type = "pdf", file = "FIGURE_2.pdf",
       width = 7, height = 5, dpi = 300)
ggplot(ratio.lm.boot, aes(x = syslab, y = ri_l2_m)) + 
  geom_point() + 
  geom_hline(aes(yintercept = 0), linetype = 3, color = "gray30") +
  geom_errorbar(aes(ymin = ri_l2_lo, ymax = ri_l2_hi), width = 0) +
  coord_flip() +
  scale_x_discrete(limits = (ratio.lm.boot[rev(order(ratio.lm.boot$ri_l2_m)), "syslab"])) +
  theme_bw() +
  xlab("") + 
  ylab("Importance Ratio (\U03C9)\nSymbolic versus Substantive Component of the Left-Right\n(Bootstrapped 95% C.I.)")
dev.off()

# Does it correlate with the number of partisan groups in each country?
partisan_gr <- ddply(data.un, .(syslab), summarise, n_partisan_gr = length(table(close_party_full)))
ratio.npart.test <- merge(ratio.lm.boot, partisan_gr, by = "syslab")
cor.test(ratio.npart.test$ri_l2_m, ratio.npart.test$n_partisan_gr) # No correlation
rm(ratio.npart.test)
# Does it correlate with the share of partisans in each country?
sh.part <- ddply(data.un, .(syslab), summarise, share = mean(close_dummy_full))
ratio.share.test <- merge(ratio.lm.boot, sh.part, by = "syslab")
cor.test(ratio.share.test$ri_l2_m,ratio.share.test$share) # No correlation
rm(ratio.share.test, sh.part)


#=============================================================================================================
#
# Part 2:
# PERCEPTIONS
#
#=============================================================================================================
# Load the STACKED data
data <- read.dta("EES_2009_stacked.dta")
# Merge with Silber et al. measure of relative importance
data <- merge(data, ratio.lm.boot, by = "syslab")
# Count the number of parties in each country from the unstacked data
  data.un$num_parties <- rowSums(!is.na(data.un[grep("l_r_ch", names(data.un))]))
  n_parties <- ddply(data.un, .(syslab), summarise, n_parties = mean(num_parties))
  n_parties$n_parties_medcen <- n_parties$n_parties - median(n_parties$n_parties)
# Merge stacked data with number of parties in each country (uncentered and centered around the median)
data <- merge(data, n_parties, by = "syslab")
# Drop cases with missing values in the IVs
data <- data[!is.na(data$dist_p) & !is.na(data$dist_ch) & !is.na(data$dist_diff) & 
               !is.na(data$pid) & !is.na(data$antipid) & 
               !is.na(data$lrext) & !is.na(data$center) & 
               !is.na(data$edlow) & !is.na(data$edhigh) & !is.na(data$female) & !is.na(data$age_gsd) & 
               !is.na(data$polint_gsd) & !is.na(data$polinfo_gsd), ]
# Center Polarization around grand mean
data$ch_polar <- data$ch_polar - mean(unique(data$ch_polar))

#=============================================================================================================
# 2.1 - DISCRIMINATION 
#=============================================================================================================
# Percentage positioned in the correct side (according to the experts)
correct <- ddply(data, .(syslab,id), summarise,
                 p.c = sum(correct_p)/length(!is.na(correct_p)))

correct.ag <- ddply(correct, .(syslab), summarise,
                    mean.c = mean(p.c),
                    sd.c = sd(p.c),
                    up.c = quantile(boot(p.c, sample.mean, R = 1000)$t, p = 0.975),
                    lo.c = quantile(boot(p.c, sample.mean, R = 1000)$t, p = 0.025))
correct.ag <- merge(correct.ag, ratio.lm.boot, by = "syslab")

# FIGURE 3
quartz(type = "pdf", file = "FIGURE_3.pdf", 
       width = 8, height = 7, dpi = 300)
ggplot(correct.ag, aes(x = ri_l2_m, y = mean.c, weigths = 1/sqrt(sd.c*ri_l2_sd))) +
  geom_errorbar(aes(ymin = lo.c, ymax = up.c), col = "gray75") +
  geom_errorbarh(aes(xmin = ri_l2_lo, xmax = ri_l2_hi), col = "gray75") +
  geom_smooth(col = "gray40", method = "lm", se = F) +
  geom_point(colour = "black") + 
  geom_text_repel(aes(label = syslab), size = 3) +
  theme_bw() +
  xlab("Importance ratio (\U03C9)\n(Gray lines are 95% bootstrapped confidence intervals)") + 
  ylab("Share of parties placed on correct side\n(Gray lines are 95% bootstrapped confidence intervals)")
dev.off()

rm(correct)
cor.test(correct.ag$ri_l2_m, correct.ag$mean.c)

#=============================================================================================================
# 2.2 - GENERALIZATION 
#=============================================================================================================
# Entropy party placement for each side
entr.lr <- ddply(subset(data, lrg_ch != 3 & center == 0  & all_same_lrp == 0), 
                 .(syslab, id, lrg_ch), summarise,
                 lr.e = entropy(table(lrp))/log(length(!is.na(lrp))),
                 tot = length(!is.na(lrp)),
                 sameside = mean(sameside_ch),
                 otherside = mean(otherside_ch))
entr.lr <- entr.lr[entr.lr$tot > 1, ]
entr.lr$group <- NA
entr.lr[entr.lr$sameside == 1, ]$group <- "In-group Parties"
entr.lr[entr.lr$otherside == 1, ]$group <- "Out-group Parties"

entropy.ag <- ddply(entr.lr,.(syslab, group), summarise,
                    mean.e = mean(lr.e),
                    sd.e = sd(lr.e),
                    up.e = quantile(boot(lr.e, sample.mean, R = 1000)$t, p = 0.975),
                    lo.e = quantile(boot(lr.e, sample.mean, R = 1000)$t, p = 0.025))
entropy.ag <- merge(entropy.ag, ratio.lm.boot, by = "syslab")

# FIGURE 4
quartz(type = "pdf", file = "FIGURE_4.pdf",
       width = 11, height = 6, dpi = 300)
ggplot(entropy.ag, aes(x = ri_l2_m, y = mean.e, weigths = 1/sqrt(sd.e*ri_l2_sd))) +
  geom_errorbar(aes(ymin = lo.e, ymax = up.e), col = "gray75") +
  geom_errorbarh(aes(xmin = ri_l2_lo, xmax = ri_l2_hi), col = "gray75") +
  geom_smooth(col = "gray40", method = "lm", se = F) +
  geom_point(colour = "black") + 
  geom_text_repel(aes(label = syslab), size = 3) +
  facet_wrap(~group, ncol = 2) +
  theme_bw() +
  xlab("Importance ratio (\U03C9) \n(Gray lines are 95% bootstrapped confidence intervals)") + 
  ylab("Normalized Entropy Party Placement\n(Gray lines are 95% bootstrapped confidence intervals)")
dev.off()

rm(entr.lr)
cor.test(entropy.ag$ri_l2_m[entropy.ag$group == "In-group Parties"],
         entropy.ag$mean.e[entropy.ag$group == "In-group Parties"])
cor.test(entropy.ag$ri_l2_m[entropy.ag$group == "Out-group Parties"],
         entropy.ag$mean.e[entropy.ag$group == "Out-group Parties"])


#=============================================================================================================
# 2.3 - REGRESSION MODELS ACCENTUATION EFFECTS
#=============================================================================================================
# EMPTY MODEL (Table 1 in the paper, Table 2 in Appendix)
id.empty <- lmer(dist_diff ~ otherside_ch +
                   (1|id) + (1 + otherside_ch|syslab),
                 data = subset(data, sameside_ch == 1 | otherside_ch == 1))
summary(id.empty)
summary(id.empty)$logLik
AIC(id.empty)
BIC(id.empty)

# Save random effects to be used later for FIGURE 5
ran.eff.full <- data.frame(cbind(c((fixef(id.empty)[1] + ranef(id.empty)$syslab[,1]),
                                   (fixef(id.empty)[1] + fixef(id.empty)[2] + 
                                      ranef(id.empty)$syslab[,1] + ranef(id.empty)$syslab[,2])),
                                 c(se.ranef(id.empty)$syslab[,1], se.ranef(id.empty)$syslab[,2])))

names(ran.eff.full) <- c("COEF","SE")
ran.eff.full$syslab <- rep(rownames(ranef(id.empty)$syslab),2)
ran.eff.full$G <- rep(c("In-group parties","Out-group parties"),each = dim(ranef(id.empty)$syslab)[1])
ran.eff.full <- merge(ran.eff.full, ratio.lm.boot, by = "syslab")

# FULL MODEL - Bootstrapped 
var.boot <- c("syslab", "id", "dist_diff", "otherside_ch", 
              "pid", "antipid", "edlow", "edhigh", "lrext_cen", 
              "polint_cen", "polinfo_cen", "n_parties_medcen", "ch_polar")
n.r <- 500
dat.boot <- subset(data, sameside_ch == 1 | otherside_ch == 1, select = var.boot)
coef.boot <- matrix(nrow = 13, ncol = n.r)
se.boot <- matrix(nrow = 13, ncol = n.r)
varcov.boot <- list()
re.sd.boot <- matrix(nrow = 4, ncol = n.r)
fit.boot <- matrix(nrow = 3, ncol = n.r)
options(warn=1)

set.seed(461982)
for(i in 1:n.r){
  ratio <- data.frame(syslab = ratio.lm.boot$syslab, 
                      ri_l2_m = rnorm(n = nrow(ratio.lm.boot), 
                                      mean = ratio.lm.boot$ri_l2_m, 
                                      sd = ratio.lm.boot$ri_l2_sd))
  dtb <- merge(dat.boot, ratio, by = "syslab")
  m.b <- lmer(dist_diff ~ otherside_ch +
                pid + antipid + 
                edlow + edhigh + 
                lrext_cen + 
                polint_cen + polinfo_cen +
                n_parties_medcen + ch_polar +
                ri_l2_m + ri_l2_m:otherside_ch + 
                (1|id) + (1 + otherside_ch|syslab),
              data = dtb)
  coef.boot[, i] <- fixef(m.b)
  varcov.boot[[i]] <- vcov(m.b)
  se.boot[, i] <- sqrt(diag(varcov.boot[[i]]))
  re.sd.boot[, i] <- c(attr(VarCorr(m.b)$id, "stddev"), 
                       attr(VarCorr(m.b)$syslab, "stddev"), 
                       attr(VarCorr(m.b), "sc")^2)
  fit.boot[, i] <- c(as.numeric(logLik(m.b)), AIC(m.b), BIC(m.b))
  print(i)
  rm(ratio, dtb, m.b)
}
rm(dat.boot)

#=============================================================================================================
# 2.3.1 - Make table (Table 1 in the paper, Table 2 in Appendix)
#=============================================================================================================
# Coefficients
betas <- apply(coef.boot, 1, mean)

# Calculate standard errors using Rubin's rule
se <- sqrt(apply(se.boot^2, 1, mean) + (1 + 1/n.r)*apply(coef.boot, 1, var))

# P-values
pval <- round((1 - pnorm(abs(betas) / se))*2, 4)

# Fit statistics (mean)
fit <- apply(fit.boot, 1, mean)

# Variance random effects
vranef <- apply(re.sd.boot^2, 1, mean) + (1 + 1/n.r)*apply(re.sd.boot^2, 1, var)

# Coefficients and standard errors
data.frame(b = round(betas, 3), se = round(se, 3), p = pval)

#=============================================================================================================
# 2.3.2 - Plot simulated predicted probability
#=============================================================================================================
# Store betas
meanbetas <- betas
# Combined variance-covariance matrix (for simulation)
vcov.bet <- (1/(n.r - 1))*(coef.boot - betas) %*% t(coef.boot - betas)
vcov.wit <- aaply(laply(varcov.boot, as.matrix), c(2, 3), mean)
covmatrix <- vcov.wit + (1 + (1/n.r))*vcov.bet
# Simulation
n.s <- 100
sim.a <- data.frame(NULL)
sim.c <- data.frame(NULL)
set.seed(461982)
MCdata <- mvrnorm(n=1000, meanbetas, covmatrix)
partrange <- seq(min(data$ri_l2_m), max(data$ri_l2_m), length.out = n.s)

for (i in 1:n.s){
  p <- partrange[i]
  betahat.a <- MCdata[, "(Intercept)"] + 0*MCdata[, "otherside_ch"] + 
    mean(data$lrext_cen)*MCdata[, "lrext_cen"] + mean(data$polint_cen)*MCdata[, "polint_cen"] + 
    mean(data$polinfo_cen)*MCdata[, "polinfo_cen"] +
    (p*MCdata[, "ri_l2_m"]) + 0*(p*MCdata[, "otherside_ch:ri_l2_m"])
  betahat.c <- MCdata[, "(Intercept)"] + 1*MCdata[, "otherside_ch"] + 
    mean(data$lrext_cen)*MCdata[, "lrext_cen"] + mean(data$polint_cen)*MCdata[, "polint_cen"] + 
    mean(data$polinfo_cen)*MCdata[, "polinfo_cen"] +
    (p*MCdata[, "ri_l2_m"]) + 1*(p*MCdata[, "otherside_ch:ri_l2_m"])
  p.a <- mean(betahat.a)  
  lo.a <- quantile(betahat.a, probs = c(0.025))
  hi.a <- quantile(betahat.a, probs = c(0.975))
  p.c <- mean(betahat.c)  
  lo.c <- quantile(betahat.c, probs = c(0.025))
  hi.c <- quantile(betahat.c, probs = c(0.975))
  sim.a <- rbind(sim.a, c(p, 2^p, p.a, lo.a, hi.a)) 
  sim.c <- rbind(sim.c, c(p, 2^p, p.c, lo.c, hi.c))
}
colnames(sim.a)<-c("X", "expX", "Y", "LO", "HI") 
colnames(sim.c)<-c("X", "expX", "Y", "LO", "HI") 
sim.ac <- rbind(sim.a, sim.c)
sim.ac$G <- rep(c("In-group parties", "Out-group parties"), each = nrow(sim.ac)/2)


# FIGURE 5
quartz(type = "pdf", file = "FIGURE_5.pdf",
       width = 9, height = 6, dpi = 300)
ggplot(sim.ac,aes(x = X, y = Y)) + 
  geom_pointrange(data = ran.eff.full, 
                aes(x = ri_l2_m, y = COEF, ymin = COEF - 1.96*SE, ymax = COEF + 1.96*SE), 
                size = 0.2, colour = "gray70") +
  geom_text_repel(data = ran.eff.full, 
            aes(x = ri_l2_m, y = COEF, label = syslab), 
            size = 3, colour = "gray30") +
  geom_line(aes(x=X, y=LO),color="black", linetype = 2) +
  geom_line(aes(x=X, y=HI),color="black", linetype = 2) +
  geom_line(aes(x=X, y=Y),color="black") +
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits = c(-1.72, 1.38), breaks = seq(-2, 2, by = 0.5)) +
  facet_wrap(~G) +
  theme_bw() +
  xlab("Importance ratio (\U03C9)") + ylab("Predicted Distance Bias\n(95% C.I.)")
dev.off()

rm(MCdata,betahat.a,betahat.c,sim.a,sim.c)


# Save workspace image (to be loaded in other scripts to get appendix material)
save.image("allmodels.RData")
